/*Author: Ilana M Ventura														  */
/*Date: September 2024															  */
/*Step 1: Read in data, recode demographics*/


libname out  "H:\UChicago\Census Experiment\Census Experiment Replication\Replication Files\out"; run; /*This is location of output*/
libname dta "H:\UChicago\Census Experiment\Census Experiment Replication\Replication Files"; run; /*This is location of dataset*/
option nofmterr;


Data sl_dat;
	set dta.Census_Change_replication_data;
run;


*check sampling groups and experimental groups;
proc freq data= sl_dat;
	table filter_rand racegroup/ list missing norow nocol;
run;

*create race flags based on self-reports;
Data sl_dat; 
	set sl_dat;
	if f2b_hisp_m_2= 1 or 
		f2b_hisp_m_3= 1 or 
		f2b_hisp_m_4= 1 or 
		f2b_hisp_m_5= 1 then 
			Hisp=1;
		else if f2a_race_2=1 then Hisp=1;
		else Hisp=0;

		if f2a_race_1=1 or f2b_race_1=1
			then White=1; 	else White=0;
		if f2a_race_3=1 or f2b_race_2=1
			then Black=1; 	else Black=0;
		if f2a_race_4=1 or  f2b_race_4=1 or  f2b_race_5=1
			or  f2b_race_7=1 or  f2b_race_8=1 or  f2b_race_10=1
			or  f2b_race_11=1 or  f2b_race_13=1
			then Asian=1; 	else Asian=0;
		if f2a_race_5=1 or f2b_race_3=1 then AmIndAN=1;
			else AmIndAN=0;
		if f2a_race_7=1 or f2b_race_6=1 or f2b_race_9=1
			or f2b_race_12=1 or f2b_race_14=1 then NHoPI=1;
				else NHoPI=0;		
		if f2a_race_6=1 then MENA=1;
			else MENA=0;
		if f2a_race_8=1  or f2b_race_15=1 then SOR=1;
			else SOR=0;


run;

*other demographic recodes;
data sl_dat;
	set sl_dat;

	age= 2023-birthyr;

	if gender= 2 then Female=1;
		else Female=0;

	rename q73= skin_color_scale;
		if q73=98 then q73=.;

	if faminc_new =97 then Income="Prefer not to say";
		else if faminc_new in (1,2,3)
			then income="Less than $30K";
		else if faminc_new in (4,5,6)
			then income="$30K-$59K";
		else if faminc_new in (7,8,9)
			then income="$60K-$99K";
		else income="$100K or more";

	if Income="Less than $30K" then Inc_a=1; else Inc_a=0;
		if Income="$30K-$59K" then Inc_b=1; else Inc_b=0;
		if Income="$60K-$99K" then Inc_c=1; else Inc_c=0;
		if Income="$100K or more" then Inc_d=1; else Inc_d=0;
		if Income="Prefer not to say" then Inc_e=1; else Inc_e=0;
		
	if Q13aa in ('.', 'amereica',	'America',	'america',	'America Iowa',	'America.',	'American',	
				'Atlantic City', 'Arizona',	'Biumunt', 'Boulder United state of America',	
				'CAlifornia',	'Geo', 'Georgia', 'Gerogia', 'Uni', 'Uninited', 'Yes', 
				'none', 'united', 'United',
				'California',	'Chicago',	'Dallas',	'Delaware',	'Estados unidos',	
				'Estados Unidos de América (EE.UU.)',	'Florida',	'Hawaii',	'Hawaii TH',	
				'Honolulu, Hawaii',	'kingdom of hawaii',	'Little Rock',	'Los Angeles',	
				'los angles',	'Memphis, Tennessee',	'Miami, fla',	'New Jersey',	
				'New York',	'New york',	'New York City',	'North Carolina',	'Pennsylvania',	
				'Rio grand city,tx',	'San diego',	'Territory of United States (Hawai’i)',	
				'Texas',	'The United States',	'The United States of America',	'U S',	
				'U S A',	'U SA',	'United sates',	'United State',	'united state',	
				'United state of America',	'UNITED STATES',	'United States',	
				'United states',	'united states',	'UNITED STATES OF AMERICA',	
				'United States Of America',	'United States of America',	'United States of america',	
				'United states of america',	'United States of America / USA',	
				'United States Virginia',	'UNTIED STATE',	'Untied states',	'USA',	'Usa',	
				'usa',	'washington' ,'Americ', 	'America', 	'American', 	'Bronx', 	
				'Bronx New York', 	'El paso tx', 	'Estados Unidos', 	
				'Estados Unidos de América (EE.UU.)', 	'Fresno', 	'Harrisburg Pennsylvania', 	
				'Hawaii', 	'Hawaii(Territory) became a State.', 	'I was born in the united states', 	
				'Jersey', 	'Los Angeles', 	'NY', 	'New Orleans, Louisiana', 	'New York City', 	
				'Pennsylvania', 	'San Francisco', 	'Texas', 	'U. S', 	'USA-Hawaii', 	'Un', 	
				'Uniited States', 	'United State', 	'United States', 	'United States of America', 	
				'United States of America / USA', 	'United States of America, California', 	
				'United States, California', 	'United states', 	'Uniyes Sya', 	'Washington', 	
				'america', 	'estados unidos', 	'houston texas', 	'long beach', 	'new York', 	
				'phi', 'Prefer not to say', 'Who care')  then R_US_Born=1;
							else R_US_Born=0;
					
							
	if Q13aa in ("Argentina","Belize","Brazil","CUBA","Chile","Colombia","Colombian","Cuba","Dominica",
				"Dominican Republic","Ecuador","El Salvador","España","Guatemala","Guyana","Honduras",
				"I was born in Jalisco Mexico","Juárez",
				"MEXICO","Mex","Mexico","México","Nicaragua","PUERTO RICO","Peru","Perú",
				"Puer","Puerto","Puerto Rico","Puerto rico","Republica Dominicana","República Dominicana","Spain",
				"URUGUAY","Venezuala","Venezuela","bolivia","mexico","peru","puerto rico","venezuela", 
				'Argentina', 	'Bolivia', 	'Brazil', 	'COLOMBIA', 	'CUBA', 	'Chile', 	
				'Colo', 	'Colombia', 	'Cuba', 	'Dominican Republic', 	'Ecuador', 	
				'El Salvador', 	'El salvador', 	'Guatemala', 	'Guyana', 	'Honduras', 	'Mexi', 	
				'Mexico', 	'Mx', 	'México', 	'Nicaragua', 	'Peru', 	'Perú', 	'Puerto Rico', 	
				'República Dominicana', 	'Uruguay', 	'Vene', 	'Venezuela', 	'cuba', 	
				'ecuador', 	'guate', 	'mexico', 	'puerto rico', 	'utuado puerto rico')
					then R_Born_LatAmSp=1;
					else	R_Born_LatAmSp=0;				
							
	if q47 in (18,19,20,21) then Educ_college=1;
		else Educ_college=0;
	if Q47 LT 14 then Educ_LT_HS=1; else Educ_LT_HS=0;
	if Q47 GE 14 and Q47 LT 18 then Educ_HS=1; else Educ_HS=0;
	
	if ownhome=1 then Home_owned=1;
		else Home_owned=0;

	If age <30 then age_grp_1=1; /*18-29*/
		else if age <45 then age_grp_2=1; /*30-44*/
		else if age <65 then age_grp_3=1; /*45-64*/
		else age_grp_4=1; /*65+*/
			if age_grp_1 ne 1 then age_grp_1=0;
			if age_grp_2 ne 1 then age_grp_2=0;
			if age_grp_3 ne 1 then age_grp_3=0;
			if age_grp_4 ne 1 then age_grp_4=0;
	age_migr= Q13a-birthyr;
	if age_migr ne . and age_migr<18 then Migr_child=1;
		else Migr_child=0;
run;



Data sl_dat;
	set sl_dat;
	length Region Division $50.;

	if inputstate =1 then do; Region= 'South Region'; Division='East South Central Division'; end;
		if inputstate =2 then do; Region= 'West Region'; Division='Pacific Division'; end;
		if inputstate =4 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =5 then do; Region= 'South Region'; Division='West South Central Division'; end;
		if inputstate =6 then do; Region= 'West Region'; Division='Pacific Division'; end;
		if inputstate =8 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =9 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =10 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =11 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =12 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =13 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =15 then do; Region= 'West Region'; Division='Pacific Division'; end;
		if inputstate =16 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =17 then do; Region= 'Midwest Region'; Division='East North Central Division'; end;
		if inputstate =18 then do; Region= 'Midwest Region'; Division='East North Central Division'; end;
		if inputstate =19 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =20 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =21 then do; Region= 'South Region'; Division='East South Central Division'; end;
		if inputstate =22 then do; Region= 'South Region'; Division='West South Central Division'; end;
		if inputstate =23 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =24 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =25 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =26 then do; Region= 'Midwest Region'; Division='East North Central Division'; end;
		if inputstate =27 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =28 then do; Region= 'South Region'; Division='East South Central Division'; end;
		if inputstate =29 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =30 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =31 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =32 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =33 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =34 then do; Region= 'Northeast Region'; Division='Middle Atlantic Division'; end;
		if inputstate =35 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =36 then do; Region= 'Northeast Region'; Division='Middle Atlantic Division'; end;
		if inputstate =37 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =38 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =39 then do; Region= 'Midwest Region'; Division='East North Central Division'; end;
		if inputstate =40 then do; Region= 'South Region'; Division='West South Central Division'; end;
		if inputstate =41 then do; Region= 'West Region'; Division='Pacific Division'; end;
		if inputstate =42 then do; Region= 'Northeast Region'; Division='Middle Atlantic Division'; end;
		if inputstate =44 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =45 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =46 then do; Region= 'Midwest Region'; Division='West North Central Division'; end;
		if inputstate =47 then do; Region= 'South Region'; Division='East South Central Division'; end;
		if inputstate =48 then do; Region= 'South Region'; Division='West South Central Division'; end;
		if inputstate =49 then do; Region= 'West Region'; Division='Mountain Division'; end;
		if inputstate =50 then do; Region= 'Northeast Region'; Division='New England Division'; end;
		if inputstate =51 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =53 then do; Region= 'West Region'; Division='Pacific Division'; end;
		if inputstate =54 then do; Region= 'South Region'; Division='South Atlantic Division'; end;
		if inputstate =55 then do; Region= 'Midwest Region'; Division='East North Central Division'; end;
		if inputstate =56 then do; Region= 'West Region'; Division='Mountain Division'; end;
	
	/*if employ=1 then Emp_full_time=1; else Emp_full_time=0;*/
	If region="Midwest Region" then Midwest=1; else Midwest=0;
		If region="Northeast Region" then Northeast=1; else Northeast=0;
		If region="South Region" then South=1; else South=0;
		If region="West Region" then West=1; else West=0;
	
	if  skin_color_scale in (1,2,3) then skin_light=1; else skin_light=0;
		if  skin_color_scale in (4,5) then skin_med=1; else skin_med=0;
		if  skin_color_scale in (6,7,8,9,10,11) then skin_dark=1; else skin_dark=0;
	*two cases with issues in the skin color, but got info from comment/ other vars;
		if caseid= 1235639264 then skin_light=1;
		if caseid= 1233429192 then skin_dark=1;
		
	
run;


Data sl_dat;
	set sl_dat;
	if pid3=1 then Democrat=1;
		else if pid3=2 then Republican=1;
		else if pid3=3 then Independent=1;
			if Democrat ne 1 then Democrat=0;
			if Republican ne 1 then Republican=0;
	 		if Independent ne 1 then Independent=0;
run;



/*O/S languages*/
Data sl_dat;
	set sl_dat;

	if uchi9_language_1 in ("A little Spanish","ESPANOL", "Espanol", "Español", "Mexican",
			"SPANISH","SPanish","Soanish","Some Spanish", "Sp", "Spa","Span","Spani",
			"Spanish","Spanish and Italian","Spanish, (not too proficient)","espanol",
			"español","sp","spa","spanish", "Soanish","Sp", "Spa", "Span", "Spanich"
			, "Spanish", "Spanish and sign Language", "Spanish espanol", "Spanish/Español", 
			"Spanishp", "mexicano", "some spanish", "spanish", "spanish.") 
				and language_well_1 in (4,5) then Speak_Spanish_Well=1;
	else if uchi9_language_2 in ("A little Spanish","ESPANOL", "Espanol", "Español", "Mexican",
			"SPANISH","SPanish","Soanish","Some Spanish", "Sp", "Spa","Span","Spani",
			"Spanish","Spanish and Italian","Spanish, (not too proficient)","espanol",
			"español","sp","spa","spanish", "EL Salvador", "SPANISH" ,"spanisg", 
			"Guatemalan Dialect", "Sp", "Spainsh", "Spanish", "spanish", "spanish-french-spanish") 
				and language_well_2 in (4,5) then Speak_Spanish_Well=1;

	else if uchi9_language_3 in ("A little Spanish","ESPANOL", "Espanol", "Español", "Mexican",
			"SPANISH","SPanish","Soanish","Some Spanish", "Sp", "Spa","Span","Spani",
			"Spanish","Spanish and Italian","Spanish, (not too proficient)","espanol",
			"español","sp","spa","spanish") and language_well_3 in (4,5) then Speak_Spanish_Well=1;

	else if uchi9_language_4 in ("A little Spanish","ESPANOL", "Espanol", "Español", "Mexican",
			"SPANISH","SPanish","Soanish","Some Spanish", "Sp", "Spa","Span","Spani",
			"Spanish","Spanish and Italian","Spanish, (not too proficient)","espanol",
			"español","sp","spa","spanish") and language_well_4 in (4,5) then Speak_Spanish_Well=1;

	else if uchi9_language_5 in ("A little Spanish","ESPANOL", "Espanol", "Español", "Mexican",
			"SPANISH","SPanish","Soanish","Some Spanish", "Sp", "Spa","Span","Spani",
			"Spanish","Spanish and Italian","Spanish, (not too proficient)","espanol",
			"español","sp","spa","spanish") and language_well_5 in (4,5) then Speak_Spanish_Well=1;
			else Speak_Spanish_Well=0;
	if langpref=2 then Speak_Spanish_Well=1;
	if Q61 in (1,2) then Black_discrim=0;
		else if Q61 in (3,4) then Black_discrim=1;
	if language_well_0 in (4,5) then English_well=1;
		else English_well=0;
	if (Speak_Spanish_Well=1 and English_well=1) or 
		(langpref=1 and Speak_Spanish_Well=1 )  then do;
		Lang_group="Bilingual";
		Lang_Bilingual=1;
		Lang_Spanish=0;
		Lang_English=0;
			end;
		else if Speak_Spanish_Well=1 or langpref=2 then do;
			Lang_group="Spanish";
			Lang_Spanish=1;
			Lang_Bilingual=0;
			Lang_English=0;
				end;
		else do;
			Lang_group="English";
			Lang_English=1;
			Lang_Spanish=0;
			Lang_Bilingual=0;
				end;

	if uchi9_language=1 and  language_well_1 in (4,5) then Speak_other_language=1;
		else if Lang_Bilingual=1 then Speak_other_language=1;
		else Speak_other_language=0;
			
run;


proc freq data= sl_dat;
	table langpref *Speak_Spanish_Well* English_well*Lang_group
		Lang_English*Lang_Spanish*Lang_Bilingual*Lang_group
		Lang_group*Speak_other_language
		/ list missing norow nocol;
run;

/*recode discrimination*/
data sl_dat;
	set sl_dat;
	if q58a=1 then Discrimination=1;
		else Discrimination=0;
run;

proc freq data= sl_dat;
	table q58*q58a*Discrimination/ list missing norow nocol;
run;


/*recode country of origin*/
Data sl_dat;
	set sl_dat;
	if Filter1_m_1=1 then Orig_Mex=1;
		else Orig_Mex=0;
	if Filter1_m_2=1 then Orig_PR=1;
		else Orig_PR=0;
	if Filter1_m_3=1 then Orig_Cuba=1;
		else Orig_Cuba=0;
	if Orig_Mex=0 and Orig_PR=0 and Orig_Cuba=0
		then Orig_Other=1;
			else Orig_Other=0;
run;

proc freq data= sl_dat;
	table Filter1_m_1*Orig_Mex*Filter1_m_2*Orig_PR*Filter1_m_3*Orig_Cuba*Orig_Other
		/ list missing norow nocol nocum;
run;


	
